"""This module is used to keep running stats of the whole system or for a 
particular job. This logs the following parameters against a time-stamp:
1. Number of running maps (per job / global)
2. Number of completed maps (per job / global)
3. Number of running reduces (per job / global)
4. Number of completed reduces (per job / global)
5. Data fetched from DFS for Files only (per job / global)
6. Data written to DFS for Files only (per job / global)
7. Data shuffled for Files (per job / global)
8. CPU user %
9. CPU system %
10. Network Tx KB
11. Network Rx KB
"""


import ma.log
import ma.const
import ma.utils.listutils as listutils
import ma.utils.sysinfo as sysinfo
import time
import queue


class Statster(object):
    """This class used to note statistics in a log specified by conf/logging.conf.
    It notes running, done maps and reduces, and some system statistics after a
    given time interval as specified in the XML
    """
    
    def __init__(self, nodetracker, job_id=None):
        """Initilize statster
        """
        
        self.__log = ma.log.get_logger("ma.stats")
        self.__nt = nodetracker
        self.__interval = ma.const.XmlData.get_float_data(ma.const.xml_stats_interval)
        self.__buffer_length = ma.const.XmlData.get_int_data(ma.const.xml_stats_buffer_size)
        self.__file_size_div = ma.const.XmlData.get_float_data(ma.const.xml_stats_file_size_div)
        self.__timer_id = self.__nt.timer.addTimer(self.__interval, self.get_stats, [])
        # TODO: Rollover the log
        
        self.__job_id = job_id
        
        # the queue of values used as buffer before things are written to log
        self.__values = [[],    # time
                         [],    # running maps 
                         [],    # completed maps 
                         [],    # running reduces
                         [],    # completed reduces
                         [],    # KBs fetched from DFS
                         [],    # KBs shuffled from this NT
                         [],    # KBs written to DFS
                         [],    # KBs total shuffled
                         [],    # CPU user %
                         [],    # CPU system %
                         [],    # Network Rx KBs
                         []]    # Network Tx KBs
        
        # values maintained for book-keeping
        self.__prv_running_maps = 0
        self.__prv_completed_maps = 0
        self.__prv_running_reduces = 0
        self.__prv_completed_reduces = 0
        self.__total_rx = 0
        self.__total_tx = 0
        
        # initialize net_stats
        self.__total_rx, self.__total_tx = sysinfo.net_stats()
        
        # print header
        self.__log.info('time/epoch,running maps,completed maps,running redc.s,completed redc.s,DFS reads KBs,Shuffled out KBs,DFS writes KBS,Total shuffled KBS,CPU user %,CPU sys %,net Tx KBs,net Rx KBs')
    
    
    def get_stats(self):
        """Fetches the stats from all the different location and adjust both
        the values list and the log if need be.
        """
        
        # adjust the buffer and write to log if buffer limit reached
        if len(self.__values[0]) >= self.__buffer_length:
            self.__log.info(self.__flush_oldest_stats_line())
            
        # get and adjust running and completed maps
        running_maps, completed_maps = self.__nt.returnNoOfMaps(self.__job_id)
        running_maps, completed_maps = self.__adjust_running_completed(running_maps, 
                                                    completed_maps, self.__prv_running_maps, 
                                                    self.__prv_completed_maps)
        
        # get and adjust running and completed reduces
        running_reduces, completed_reduces = self.__nt.returnNoOfReduces(self.__job_id)
        running_reduces, completed_reduces = self.__adjust_running_completed(running_reduces, 
                                                    completed_reduces, self.__prv_running_reduces, 
                                                    self.__prv_completed_reduces)
        
        # get file transfer info 
        input_from_dfs_list = self.__nt.hdfs.flush_file_transfer_to_local_infos(self.__job_id)
        output_to_dfs_list = self.__nt.hdfs.flush_file_transfer_to_dfs_infos(self.__job_id)
        files_shuffled_list = self.__nt.output_server.flush_file_transfer_infos(self.__job_id)
        byts_tx_shuffled = int(float(self.__nt.output_server.return_bytes_transferred()) / self.__file_size_div)
        
        # get the system related information
        cpu_user, cpu_sys = sysinfo.cpu_topstats()
        rx, tx = sysinfo.net_stats(self.__total_rx, self.__total_tx)
        self.__total_rx += rx
        self.__total_tx += tx
        rx = int(float(rx) / self.__file_size_div)
        tx = int(float(tx) / self.__file_size_div)
        
        # append the new values to the __values list
        self.__values[0].append(time.time())        # time
        self.__values[1].append(running_maps)       # running maps
        self.__values[2].append(completed_maps)     # completed maps
        self.__values[3].append(running_reduces)    # running reduces
        self.__values[4].append(completed_reduces)  # completed reduces
        self.__values[5].append(0)                  # initialize KBs fetched from DFS
        self.__values[6].append(0)                  # initialize KBs shuffled from this NT
        self.__values[7].append(0)                  # initialize KBs written to DFS
        self.__values[8].append(byts_tx_shuffled)   # KBs total shuffled
        self.__values[9].append(cpu_user)           # CPU user %
        self.__values[10].append(cpu_sys)           # CPU system %
        self.__values[11].append(rx)                # Network Rx KBs
        self.__values[12].append(tx)                # Network Tx KBs
        
        # add the newly fetched values to the log
        for filetxinfo in input_from_dfs_list:
            self.__adjust_buffer(filetxinfo, self.__values[5], self.__values[0])
        
        # add the newly fetched values to the log
        for filetxinfo in files_shuffled_list:
            self.__adjust_buffer(filetxinfo, self.__values[6], self.__values[0])
        
        # add the newly fetched values to the log
        for filetxinfo in output_to_dfs_list:
            self.__adjust_buffer(filetxinfo, self.__values[7], self.__values[0])
                
        # set stats for next update
        self.__prv_running_maps, self.__prv_completed_maps = running_maps, completed_maps
        self.__prv_running_reduces, self.__prv_completed_reduces = running_reduces, completed_reduces 
                
    
    def __adjust_buffer(self, file_tx_info, lst, time_lst):
        """This function adjusts the buffer given a certain FileTransferInfo
        object. With it a time list is also given with 
        """
        
        start_idx = listutils.list_pos(time_lst, file_tx_info.start_time) - 1
        end_idx = listutils.list_pos(time_lst, file_tx_info.end_time) - 1
        
        if start_idx < 0:
            print("STATSTER ---> The stats buffer size is too small ->", file_tx_info.start_time, file_tx_info.end_time)
            start_idx = 0
        
        if end_idx >= len(time_lst):
            print("STATSTER ---> The time given exceeds the latest one in stats ->", file_tx_info.start_time, file_tx_info.end_time)
            end_idx = len(time_lst) - 1
        
        # get the transfer rate / Note that the time period is taken bigger
        #    considering a stat for a specific time is for the next so and so
        #    time period
        time_period = time_lst[end_idx+1] - time_lst[start_idx]
        transfer_rate = (float(file_tx_info.file_size) / self.__file_size_div) / time_period
        transfer_rate = int(transfer_rate)
        
        # adjust the given buffer
        idxs = list(range(start_idx, end_idx + 1))
        for idx in idxs:
            lst[idx] = lst[idx] + transfer_rate  
            
    
    def __adjust_running_completed(self, running, completed, running_prv, completed_prv):
        """This function adjusts missed spikes in running and completed
        maps or reduces. Needs to be called on every update
        """
        
        if completed - completed_prv != 0:
            if completed - completed_prv != running_prv - running:
                got_done = 0
                if running_prv > running:
                    got_done = running_prv - running
                discrapency = (completed - completed_prv) - got_done
                c = completed - discrapency
                r = running + discrapency
                return r, c
        
        return running, completed

    
    def __flush_oldest_stats_line(self):
        """This function pops the first line in the stats value list, and 
        returns it as a string which can be inserted in a log
        """
        
        log_str = ""
        
        # check if data is available for flushing to log
        if len(self.__values[0]) == 0:
            print("STATSTER ---> The values list has no data to flush to the log")
            return None
        
        # flush first values to a comma-separated string
        for lst in self.__values:
            if log_str != "":
                log_str += ","
            
            # write to log string and delete first value
            log_str += str(lst[0])
            del lst[0]
            
        return log_str
    
        
    def stop_statster(self):
        """This function instructs this statser object to stop noting any 
        remaining statistics (flushes all statistices to the file)
        """
        
        # flush all remaining stats
        if len(self.__values[0]) > 0:
            self.__flush_oldest_stats_line()
        
        self.__nt.timer.removeTimer(self.__timer_id)
        
        